# -*- coding: utf-8 -*-
import scrapy
from spCrawl.items import DoubanItem


class DoubanSpider(scrapy.Spider):
    name = 'douban'
    allowed_domains = ['douban.com']
    url = "https://movie.douban.com/top250?start="
    offset = 0
    end = "&filter="
    start_urls = [url + str(offset) + end]

    # https://movie.douban.com/top250?start=0&filter=
    # https://movie.douban.com/top250?start=25&filter=
    # https://movie.douban.com/top250?start=50&filter=
    def parse(self, response):
        for each in response.xpath("//div[@id='content']//div[@class='info']"):
            item = DoubanItem()
            item["title"] = each.xpath(".//div[@class='hd']/a/span[@class='title'][1]/text()").extract()[0]
            item["score"] = each.xpath(".//div[@class='bd']//span[@class='rating_num']/text()").extract()[0]
            item["content"] = each.xpath(".//div[@class='bd']//p[@class='quote']/span/text()").extract()[0]
            item["info"] = each.xpath(".//div[@class='bd']/p/text()").extract()[0]

            # 返回给pipeline
            yield item


        while self.offset < 225:
            self.offset += 25
            #下一页
            yield scrapy.Request(url=self.url + str(self.offset) + self.end, callback=self.parse)